Python for Bioinformatics

This Jupyter notebook is intented to be used alongside the book Python for Bioinformatics

Chapter 3: Basic Programming: Data Types

STRINGS



In [2]:

    
"This is a string in Python"
'This is a string in Python'
'''This is a string in Python'''
"""This is a string in Python"""









    Out[2]:





'This is a string in Python'



In [4]:

    
"A single quote (’) inside a double quote"
'Here we have "double quotes" inside single quotes'









    Out[4]:





'Here we have "double quotes" inside single quotes'



In [5]:

    
"Mixing quotes leads to the dark side'









    



  File "<ipython-input-5-392baaada2f8>", line 1
    "Mixing quotes leads to the dark side'
                                          ^
SyntaxError: EOL while scanning string literal



In [6]:

    
"""Hi! I'm a
multiline
          string"""









    Out[6]:





"Hi! I'm a\nmultiline\n          string"



In [7]:

    
"Hi! I'm a\nmultiline\n          string"









    Out[7]:





"Hi! I'm a\nmultiline\n          string"

Strings are sequences of Unicode characters



In [4]:

    
'In Python 3, strings are Unicode: こんにちは 世界'









    Out[4]:





'In Python 3, strings are Unicode: こんにちは 世界'

String Manipulation



In [4]:

    
signal_peptide = 'MASKATLLLAFTLLFATCIA'



In [5]:

    
signal_peptide.lower()









    Out[5]:





'maskatlllaftllfatcia'



In [6]:

    
signal_peptide









    Out[6]:





'MASKATLLLAFTLLFATCIA'



In [7]:

    
signal_peptide = signal_peptide.lower()
signal_peptide









    Out[7]:





'maskatlllaftllfatcia'



In [8]:

    
dna_seq = 'GCTAGTAATGTG'
m_rna_seq = dna_seq.replace('T','U')
m_rna_seq









    Out[8]:





'GCUAGUAAUGUG'



In [9]:

    
dna_seq









    Out[9]:





'GCTAGTAATGTG'



In [10]:

    
c = dna_seq.count("C")
g = dna_seq.count("G")
(c+g)/len(dna_seq)*100









    Out[10]:





41.66666666666667



In [11]:

    
m_rna_seq









    Out[11]:





'GCUAGUAAUGUG'



In [12]:

    
m_rna_seq.find('AUG')









    Out[12]:





7



In [13]:

    
m_rna_seq.find('GGG')









    Out[13]:





-1



In [14]:

    
'This string has words separated by spaces'.split()









    Out[14]:





['This', 'string', 'has', 'words', 'separated', 'by', 'spaces']



In [15]:

    
"Alex Doe,5555-2333,nobody@example.com".split()









    Out[15]:





['Alex', 'Doe,5555-2333,nobody@example.com']



In [16]:

    
"Alex Doe,5555-2333,nobody@example.com".split(",")









    Out[16]:





['Alex Doe', '5555-2333', 'nobody@example.com']



In [17]:

    
''.join(['A','C','A','T'])









    Out[17]:





'ACAT'

Lists

List Is a Basic Datatype in Python



In [18]:

    
'Alex Doe,5555-2333,hi@example.com'.split(',')









    Out[18]:





['Alex Doe', '5555-2333', 'hi@example.com']



In [19]:

    
first_list = [1, 2, 3, 4, 5]



In [20]:

    
other_list = [1, 'two', 3, 4, 'last']



In [21]:

    
nested_list = [1, 'two', first_list, 4, 'last']
nested_list









    Out[21]:





[1, 'two', [1, 2, 3, 4, 5], 4, 'last']



In [22]:

    
empty_list = []
empty_list









    Out[22]:





[]



In [23]:

    
first_list = [1, 2, 3, 4, 5]
first_list[0]









    Out[23]:





1



In [24]:

    
first_list[1]









    Out[24]:





2



In [25]:

    
first_list = [1, 2, 3, 4, 5]
first_list[-1]









    Out[25]:





5



In [26]:

    
first_list[-4]









    Out[26]:





2



In [27]:

    
aseq = "atggctaggc"
list(aseq)









    Out[27]:





['a', 't', 'g', 'g', 'c', 't', 'a', 'g', 'g', 'c']



In [28]:

    
samples = ['red'] * 5
samples









    Out[28]:





['red', 'red', 'red', 'red', 'red']



In [29]:

    
samples = [None] * 5
samples









    Out[29]:





[None, None, None, None, None]



In [30]:

    
a = [0, 1, 2, 3, 4, 5]



In [31]:

    
[3*x for x in a]









    Out[31]:





[0, 3, 6, 9, 12, 15]



In [32]:

    
animals = ['  King Kong', '  Godzilla ', 'Gamera  ']
[x.strip() for x in animals]









    Out[32]:





['King Kong', 'Godzilla', 'Gamera']



In [33]:

    
animals = ['  King Kong', '  Godzilla ', 'Gamera  ']
[x.strip() for x in animals if 'i' in x]









    Out[33]:





['King Kong', 'Godzilla']

Modifying Lists



In [34]:

    
first_list.append(99)
first_list









    Out[34]:





[1, 2, 3, 4, 5, 99]



In [35]:

    
first_list.insert(2,50)
first_list









    Out[35]:





[1, 2, 50, 3, 4, 5, 99]



In [36]:

    
first_list.extend([6,7,8])
first_list









    Out[36]:





[1, 2, 50, 3, 4, 5, 99, 6, 7, 8]



In [37]:

    
[1,2,3]+[4,5]









    Out[37]:





[1, 2, 3, 4, 5]



In [38]:

    
first_list









    Out[38]:





[1, 2, 50, 3, 4, 5, 99, 6, 7, 8]



In [39]:

    
first_list.pop()









    Out[39]:





8



In [40]:

    
first_list.pop(2)









    Out[40]:





50



In [41]:

    
first_list









    Out[41]:





[1, 2, 3, 4, 5, 99, 6, 7]



In [42]:

    
first_list.remove(99)
first_list









    Out[42]:





[1, 2, 3, 4, 5, 6, 7]



In [43]:

    
first_list









    Out[43]:





[1, 2, 3, 4, 5, 6, 7]



In [44]:

    
first_list.remove(10)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-e4a905a58ced> in <module>()
----> 1 first_list.remove(10)

ValueError: list.remove(x): x not in list



In [45]:

    
a = [1, 2, 3]
b = a
b.pop()









    Out[45]:





3



In [46]:

    
a









    Out[46]:





[1, 2]



In [47]:

    
import copy
a = [1, 2, 3]
b = copy.copy(a)
b.pop()









    Out[47]:





3



In [48]:

    
a









    Out[48]:





[1, 2, 3]



In [49]:

    
a = [1, 2, 3]
b = a[:]
b.pop()









    Out[49]:





3



In [50]:

    
a









    Out[50]:





[1, 2, 3]



In [51]:

    
point = (23, 56, 11)



In [52]:

    
point.append(3)









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-52-690c246d9903> in <module>()
----> 1 point.append(3)

AttributeError: 'tuple' object has no attribute 'append'



In [53]:

    
point.pop()









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-53-f0dcbfa25f33> in <module>()
----> 1 point.pop()

AttributeError: 'tuple' object has no attribute 'pop'

Common Properties of the Sequences



In [5]:

    
point = (23, 56, 11)
point[0]









    Out[5]:





23



In [55]:

    
point[1]









    Out[55]:





56



In [8]:

    
my_sequence = 'MRVLLVALALLALAASATS'
my_sequence[0]









    Out[8]:





'M'



In [65]:

    
my_sequence[5]









    Out[65]:





'V'



In [2]:

    
parameters = ['UniGene', 'dna', 'Mm.248907', 5]
parameters[2]









    Out[2]:





'Mm.248907'



In [6]:

    
point[-1]









    Out[6]:





11



In [61]:

    
point[-2]









    Out[61]:





56



In [66]:

    
my_sequence[-2]









    Out[66]:





'T'



In [67]:

    
my_sequence[-4]









    Out[67]:





'S'



In [9]:

    
my_sequence[-1]









    Out[9]:





'S'



In [69]:

    
seqdata = ('MRVLLVALALLA', 12, '5FE9EEE8EE2DC2C7')
seqdata[0][5]









    Out[69]:





'V'



In [70]:

    
my_sequence="Python"
my_sequence[0:2]









    Out[70]:





'Py'



In [71]:

    
my_sequence[:2]









    Out[71]:





'Py'



In [72]:

    
my_sequence="Python"
my_sequence[4:6]









    Out[72]:





'on'



In [73]:

    
my_sequence[4:]









    Out[73]:





'on'



In [74]:

    
my_sequence[1:5]









    Out[74]:





'ytho'



In [75]:

    
my_sequence[1:5:2]









    Out[75]:





'yh'



In [76]:

    
my_sequence[::-1]









    Out[76]:





'nohtyP'



In [77]:

    
point = (23, 56, 11)
11 in point









    Out[77]:





True



In [78]:

    
my_sequence = 'MRVLLVALALLALAASATS'
'X' in my_sequence









    Out[78]:





False



In [79]:

    
point = (23, 56, 11)
point2 = (2, 6, 7)
point + point2









    Out[79]:





(23, 56, 11, 2, 6, 7)



In [80]:

    
dna_seq = 'ATGCTAGACGTCCTCAGATAGCCG'
tata_box = 'TATAAA'
tata_box + dna_seq









    Out[80]:





'TATAAAATGCTAGACGTCCTCAGATAGCCG'



In [81]:

    
point + tata_box









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-81-5fab3f8bc73f> in <module>()
----> 1 point + tata_box

TypeError: can only concatenate tuple (not "str") to tuple



In [82]:

    
point = (23, 56, 11)
len(point)









    Out[82]:





3



In [83]:

    
my_sequence = 'MRVLLVALALLALAASATS'
len(my_sequence)









    Out[83]:





19



In [84]:

    
point









    Out[84]:





(23, 56, 11)



In [85]:

    
max(point)









    Out[85]:





56



In [86]:

    
min(point)









    Out[86]:





11



In [87]:

    
my_sequence = 'MRVLLVALALLALAASATS'
max(my_sequence)









    Out[87]:





'V'



In [88]:

    
min(my_sequence)









    Out[88]:





'A'



In [89]:

    
tata_box = 'TATAAA'
list(tata_box)









    Out[89]:





['T', 'A', 'T', 'A', 'A', 'A']

Dictionaries



In [90]:

    
iupac = {'A':'Ala','C':'Cys','E':'Glu'}
print('C stands for the amino acid {0}'.format(iupac['C']))









    



C stands for the amino acid Cys



In [91]:

    
iupac['E']









    Out[91]:





'Glu'



In [5]:

    
rgb = [('red','ff0000'), ('green','00ff00'), ('blue','0000ff')]
colors_d = dict(rgb)
colors_d









    Out[5]:





{'blue': '0000ff', 'green': '00ff00', 'red': 'ff0000'}



In [93]:

    
rgb = dict(red='ff0000', green='00ff00', blue='0000ff')
rgb









    Out[93]:





{'blue': '0000ff', 'green': '00ff00', 'red': 'ff0000'}



In [94]:

    
rgb = {}
rgb['red'] = 'ff0000'
rgb['green'] = '00ff00'
rgb









    Out[94]:





{'green': '00ff00', 'red': 'ff0000'}



In [95]:

    
len(iupac)









    Out[95]:





3



In [96]:

    
iupac['S'] = 'Ser'
len(iupac)









    Out[96]:





4



In [97]:

    
iupac = {'A':'Ala','C':'Cys','E':'Glu'}
iupac









    Out[97]:





{'A': 'Ala', 'C': 'Cys', 'E': 'Glu'}



In [98]:

    
iupac['X'] = 'Xaa'
iupac









    Out[98]:





{'A': 'Ala', 'C': 'Cys', 'E': 'Glu', 'X': 'Xaa'}



In [99]:

    
from collections import OrderedDict
d = OrderedDict()
d['a'] = 'A'
d['b'] = 'B'
d['c'] = 'C'
d









    Out[99]:





OrderedDict([('a', 'A'), ('b', 'B'), ('c', 'C')])



In [100]:

    
iupac









    Out[100]:





{'A': 'Ala', 'C': 'Cys', 'E': 'Glu', 'X': 'Xaa'}



In [101]:

    
iupac.keys()









    Out[101]:





dict_keys(['A', 'E', 'X', 'C'])



In [102]:

    
iupac.values()









    Out[102]:





dict_values(['Ala', 'Glu', 'Xaa', 'Cys'])



In [103]:

    
iupac.values()









    Out[103]:





dict_values(['Ala', 'Glu', 'Xaa', 'Cys'])



In [104]:

    
iupac.keys()









    Out[104]:





dict_keys(['A', 'E', 'X', 'C'])



In [105]:

    
iupac_keys = iupac.keys()
iupac_vals = iupac.values()
iupac.pop('X')









    Out[105]:





'Xaa'



In [106]:

    
iupac_keys









    Out[106]:





dict_keys(['A', 'E', 'C'])



In [107]:

    
iupac_vals









    Out[107]:





dict_values(['Ala', 'Glu', 'Cys'])



In [15]:

    
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
iupac.items()









    Out[15]:





dict_items([('E', 'Glu'), ('A', 'Ala'), ('X', 'Xaa'), ('C', 'Cys')])



In [109]:

    
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
iupac.get('A','No translation available')









    Out[109]:





'Ala'



In [110]:

    
iupac.get('Z','No translation available')









    Out[110]:





'No translation available'



In [17]:

    
iupac.get('Z')



In [114]:

    
iupac = {'E': 'Glu', 'X': 'Xaa', 'C': 'Cys', 'A': 'Ala'}
del iupac['A']
iupac









    Out[114]:





{'C': 'Cys', 'E': 'Glu', 'X': 'Xaa'}



In [117]:

    
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}



In [118]:

    
first_set = set()
first_set.add('CP0140.1')
first_set.add('XJ8113.5')
first_set.add('EF3616.3')
first_set









    Out[118]:





{'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [119]:

    
{2*x for x in [1,2,3]}









    Out[119]:





{2, 4, 6}



In [120]:

    
first_set.add('CP0140.1')
first_set









    Out[120]:





{'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [121]:

    
{2*x for x in [1,1,2,2,3,3]}









    Out[121]:





{2, 4, 6}



In [122]:

    
uniques = {2,2,3,4,5,3}
uniques









    Out[122]:





{2, 3, 4, 5}



In [123]:

    
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
other_set = {'EF3616.3'}
common = first_set.intersection(other_set)
common









    Out[123]:





{'EF3616.3'}



In [124]:

    
common = first_set & other_set
common









    Out[124]:





{'EF3616.3'}



In [125]:

    
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
other_set = {'AB7416.2'}
first_set.union(other_set)









    Out[125]:





{'AB7416.2', 'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [126]:

    
first_set | other_set









    Out[126]:





{'AB7416.2', 'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [127]:

    
first_set.difference(other_set)









    Out[127]:





{'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [128]:

    
first_set - other_set









    Out[128]:





{'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [129]:

    
other_set - first_set









    Out[129]:





{'AB7416.2'}



In [130]:

    
first_set.symmetric_difference(other_set)









    Out[130]:





{'AB7416.2', 'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [132]:

    
first_set ^ other_set









    Out[132]:





{'AB7416.2', 'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [133]:

    
first_set









    Out[133]:





{'CP0140.1', 'EF3616.3', 'XJ8113.5'}



In [134]:

    
list(first_set)









    Out[134]:





['XJ8113.5', 'CP0140.1', 'EF3616.3']



In [135]:

    
fs = frozenset(['a','b'])
fs









    Out[135]:





frozenset({'a', 'b'})



In [136]:

    
fs.remove('a')









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-136-539d6be46eaf> in <module>()
----> 1 fs.remove('a')

AttributeError: 'frozenset' object has no attribute 'remove'



In [137]:

    
fs.add('c')









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-137-76462344c7ad> in <module>()
----> 1 fs.add('c')

AttributeError: 'frozenset' object has no attribute 'add'

Naming Objects



In [1]:

    
23crm = "1"    # Start with a number
23 = "1"       # Start with a number
Var? = "value" # Has an invalid character (?).
$five = 5      # Has an invalid character ($)
for = 123      # Has a reserved word
if = "data"    # Has a reserved word









    



  File "<ipython-input-1-967416013cf3>", line 1
    23crm = "1"    # Start with a number
        ^
SyntaxError: invalid syntax



In [142]:

    
my_sequence = 'MRVLLVALALLALAASATS'
first_list = [1,2,3,4,5]
d= {1:'a',2:'b',3:'c'}
k = d.keys()
point = (23,56,11)
first_set = {'CP0140.1','XJ8113.5','EF3616.3'}
fs = frozenset(['a','b'])



In [10]:

    
a = 3
b = [1,2,a]



In [11]:

    
b









    Out[11]:





[1, 2, 3]



In [145]:

    
a = 5
b









    Out[145]:





[1, 2, 3]



In [146]:

    
c = [1, 2, 3]
d = [5, 6, c]



In [147]:

    
c









    Out[147]:





[1, 2, 3]



In [148]:

    
d









    Out[148]:





[5, 6, [1, 2, 3]]



In [149]:

    
c.pop()









    Out[149]:





3



In [150]:

    
c









    Out[150]:





[1, 2]



In [151]:

    
d









    Out[151]:





[5, 6, [1, 2]]



In [152]:

    
a = 3
b = [1, 2, a]



In [153]:

    
b









    Out[153]:





[1, 2, 3]



In [154]:

    
a = 5
b









    Out[154]:





[1, 2, 3]



In [13]:

    
c = [1, 2, 3]
d = [5, 6, c]



In [156]:

    
c









    Out[156]:





[1, 2, 3]



In [157]:

    
d









    Out[157]:





[5, 6, [1, 2, 3]]



In [158]:

    
c.pop()









    Out[158]:





3



In [160]:

    
print(c)









    



[1, 2]



In [14]:

    
print(d)









    



[5, 6, [1, 2, 3]]